#define WGS_W 16
#define WGS_H 1

#define N 16
#define NSHIFT 4

__kernel __attribute__((reqd_work_group_size(WGS_W, 1, 1)))
void get_offsets(
                 __global unsigned*     offsets,
                 __global unsigned*     offsetsD,
                 __global unsigned*     w_ind,
                 __global unsigned*     h_ind,
                 const int              w_ind_size,
                 const int              width
             )
{
    int i = get_group_id(0)*256 + get_local_id(0);
    int j = get_group_id(0)*16 + get_local_id(0);

//    barrier(CLK_GLOBAL_MEM_FENCE);
    
    if(get_local_id(0) != 0)
        offsetsD[j] = offsets[i];
    else
    {
        int ind_i = get_group_id(0)%w_ind_size;
        int ind_j = get_group_id(0)/w_ind_size;
        int offsetOrg = h_ind[ind_j]*width + w_ind[ind_i];
        offsetsD[j] = offsetOrg;
    }
}
